{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ab770ffd-438f-40c3-a1d6-e81155dcab26",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "from scipy.stats import shapiro\n",
    "import numpy as np\n",
    "\n",
    "# 选择图片和设置路径\n",
    "target = 3\n",
    "data_name = ['0618', '0854', '1066'][target - 1]\n",
    "\n",
    "# 设置 matplotlib 使用支持中文的字体\n",
    "plt.rcParams['font.sans-serif'] = ['SimHei']\n",
    "plt.rcParams['axes.unicode_minus'] = False\n",
    "\n",
    "column_names = ['R_1_1', 'G_1_2', 'B_1_3', 'R_2_1', 'G_2_2', 'B_2_3', 'R_3_1', 'G_3_2', 'B_3_3',\n",
    "                'R_4_1', 'G_4_2', 'B_4_3', 'R_5_1', 'G_5_2', 'B_5_3', 'R_6_1', 'G_6_2', 'B_6_3',\n",
    "                'R_7_1', 'G_7_2', 'B_7_3', 'R_8_1', 'G_8_2', 'B_8_3', 'R_9_1', 'G_9_2', 'B_9_3', 'Label']\n",
    "\n",
    "data = pd.read_csv(f'../RGB_data/UniformSampler_{data_name}_3x3_multi_labelme.csv', skiprows=1, names=column_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "a1c5d837-0f8a-4ac2-8c79-13b891555446",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "finish\n",
      "finish\n",
      "finish\n"
     ]
    }
   ],
   "source": [
    "# 去除标签列，仅保留RGB特征\n",
    "features = data.iloc[:, :-1]\n",
    "\n",
    "# 将所有特征列转换为数值类型，无法转换的设为 NaN\n",
    "features = features.apply(pd.to_numeric, errors='coerce')\n",
    "\n",
    "# 检查并移除包含 NaN 的行\n",
    "features = features.dropna()\n",
    "\n",
    "# 设置画图风格\n",
    "sns.set(style=\"whitegrid\")\n",
    "\n",
    "# 指定要保存的列索引，分别对应第6、15、23列（索引为5、14、22）\n",
    "save_indices = [5, 14, 22]  \n",
    "\n",
    "for i in save_indices:\n",
    "    col = features.columns[i]\n",
    "    \n",
    "    # 进行Shapiro-Wilk正态性检验\n",
    "    stat, p_value = shapiro(features[col])\n",
    "    \n",
    "    # 创建单独的图\n",
    "    plt.figure(figsize=(5, 4))\n",
    "    \n",
    "    # 绘制直方图和核密度估计\n",
    "    sns.histplot(features[col], bins=20, kde=True)\n",
    "    plt.title(f\"Distribution of {col}\")\n",
    "    plt.xlabel(\"Features\")\n",
    "    plt.ylabel(\"Frequency\")\n",
    "    \n",
    "    # 添加p值和正态分布判断\n",
    "    normality_status = \"Normal\" if p_value > 0.05 else \"Not Normal\"\n",
    "    plt.text(0.5, 0.8, f\"p = {p_value:.4f}\\n{normality_status}\",\n",
    "             ha='center', va='center', transform=plt.gca().transAxes,\n",
    "             fontsize=10, color='red', bbox=dict(facecolor='white', alpha=0.7))\n",
    "    \n",
    "    # 保存图像\n",
    "    plt.savefig(f\"././Is_Gaussian_img/distribution_{data_name}_{col}.png\", bbox_inches='tight')\n",
    "    plt.close()  # 关闭当前图，避免多余图像显示\n",
    "    print('finish')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0fec6be5-fae5-4303-9ede-328ba6ac0604",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
